The goal of this lab is to explore methods for annotating and positioning with ggplot2 plots. This lab also utilizes scale_* to a greater degree which is part of our next reading. In fact, students may find going through/reading chapter 11 Colour scales and legends useful.
Datasets
We’ll be using the blue_jays.rda, titanic.rda, Aus_athletes.rda, and tech_stocks.rda datasets.
Using the blue_jays.rda dataset, recreate the following graphic as precisely as possible.
Hints:
Transparency is 0.8
Point size 2
Create a label_info dataset that is a subset of original data, just with the 2 birds to be labeled
Shift label text horizontally by 0.5
See ggplot2 textbook 8.3 building custom annotations
Annotation size is 4
Classic theme
Solution
Code
#Location info for annotatey_range <-range(blue_jays$Head)x_range <-range(blue_jays$Mass)caption <-"Head length versus body mass for 123 blue jays"# data for labels: geom_text()label_info <- blue_jays |>filter(BirdID %in%c("1142-05914", "702-90567"))#build plotggplot(blue_jays,aes(x = Mass,y = Head,color = KnownSex)) +geom_point(size =2,alpha =0.8,show.legend =FALSE ) +geom_text(data = label_info,mapping =aes(label = KnownSex),show.legend =FALSE,nudge_x =0.5) +theme_classic() +labs(x ="Body mass (g)",y ="Head length (mm)" ) +annotate(geom ="text", x = x_range[1],#1st value of the coordinatey = y_range[2], #2nd value of the coordinatelabel = caption,hjust =0,vjust =1)
Exercise 2
Using the tech_stocks dataset, recreate the following graphics as precisely as possible. Use the column price_indexed.
Plot 1
Hints:
Create a label_info dataset that is a subset of original data, just containing the last day’s information for each of the 4 stocks
serif font
Annotation size is 4
Solution
Code
y_rangee2p1 <-range(tech_stocks$price_indexed)x_rangee2p1 <-range(tech_stocks$date)captione2p1 <-"Stock price over time for four major tech companies"label_infoe2p1 <- tech_stocks |>filter(date %in%c("2017-06-02"))ggplot(tech_stocks, aes(x = date, y = price_indexed, color = company)) +geom_line(show.legend =FALSE) +geom_text(data = label_infoe2p1,mapping =aes(label = company),color ="black",family ="serif") +annotate(geom ="text", x = x_rangee2p1[1],#1st value of the coordinatey = y_rangee2p1[2], #2nd value of the coordinatelabel = captione2p1,hjust =0,vjust =1,family ="serif",size =4,show.legend =FALSE) +labs(x ="Date",y ="Stock price, indexed") +theme_minimal()
Plot 2
Hints:
Package ggrepel
box.padding is 0.6
Minimum segment length is 0
Horizontal justification is to the right
seed of 9876
Annotation size is 4
serif font
Solution
Code
y_rangee2p1 <-range(tech_stocks$price_indexed)x_rangee2p1 <-range(tech_stocks$date)captione2p1 <-"Stock price over time for four major tech companies"label_infoe2p1 <- tech_stocks |>filter(date %in%c("2017-06-02"))ggplot(tech_stocks, aes(x = date, y = price_indexed, color = company)) +geom_line(show.legend =FALSE) +geom_text_repel(data = label_infoe2p1,mapping =aes(label = company),box.padding =0.6,min.segment.length =0,seed =9876,show.legend =FALSE,hjust =1,color ="black",family ="serif") +annotate(geom ="text", x = x_rangee2p1[1],#1st value of the coordinatey = y_rangee2p1[2], #2nd value of the coordinatelabel = captione2p1,hjust =0,vjust =1,family ="serif",size =4,show.legend =FALSE) +labs(x ="Date",y ="Stock price, indexed") +theme_minimal()
Exercise 3
Using the titanic.rda dataset, recreate the following graphic as precisely as possible.
Hints:
Create a new variable that uses died and survived as levels/categories
Hex colors: #D55E00D0, #0072B2D0 (no alpha is being used)
Solution
Code
# Create the new survival_status variabletitanic <- titanic %>%mutate(survival_status =factor(ifelse(survived ==1, "survived", "died"), levels =c("died", "survived")))# Create the plotggplot(titanic, aes(x = sex, fill = sex)) +geom_bar(position ="dodge") +facet_grid(survival_status ~ class) +scale_fill_manual(values =c("female"="#D55E00D0", "male"="#0072B2D0")) +labs(y ="count") +theme_minimal() +theme(legend.position ="none")
Exercise 4
Use the athletes_dat dataset — extracted from Aus_althetes.rda — to recreate the following graphic as precisely as possible. Create the graphic twice: once using patchwork and once using cowplot.
Code
# Get list of sports played by BOTH sexesboth_sports <- Aus_athletes |># dataset of columns sex and sport # only unique observationsdistinct(sex, sport) |># see if sport is played by one gender or bothcount(sport) |># only want sports played by BOTH sexesfilter(n ==2) |># get list of sportspull(sport)# Process dataathletes_dat <- Aus_athletes |># only keep sports played by BOTH sexesfilter(sport %in% both_sports) |># rename track (400m) and track (sprint) to be track# case_when will be very useful with shiny appsmutate(sport =case_when( sport =="track (400m)"~"track", sport =="track (sprint)"~"track",TRUE~ sport ) )
Hints:
Build each plot separately
Bar plot: lower limit 0, upper limit 95
Bar plot: shift bar labels by 5 units and top justify
Bar plot: label size is 5
Bar plot: #D55E00D0 & #0072B2D0 — no alpha
Scatterplot: #D55E00D0 & #0072B2D0 — no alpha
Scatterplot: filled circle with “white” outline; size is 3
Scatterplot: rcc is red blood cell count; wcc is white blood cell count
Boxplot: outline #D55E00 and #0072B2; shading #D55E0040 and #0072B240
Boxplot: should be made narrower; 0.5
Boxplot: Legend is in top-right corner of bottom plot
Boxplot: Space out labels c("female ", "male")
Boxplot: Legend shading matches hex values for top two plots
Using patchwork
Solution
Code
#label info(bar plot)label_info <- athletes_dat |>count(sex)#bar graph plot_1_bar_plot <-ggplot(data = athletes_dat, mapping =aes(sex, fill = sex)) +geom_bar() +geom_text(data = label_info,mapping =aes(y = n, label = n),size =5,vjust =1,nudge_y =-5 ) +scale_fill_manual(values =c("#D55E00D0","#0072B2D0"),guide ="none" ) +scale_x_discrete(name =NULL, labels =c("female", "male")) +scale_y_continuous(name ="number",limits =c(0,95), expand =c(0,0) ) +theme_minimal()#scatterplotplot_2_scatter <-ggplot(athletes_dat, aes(rcc, wcc, fill = sex)) +geom_point(shape =21, color ="white",size =3) +scale_fill_manual(values =c("#D55E00D0","#0072B2D0"),guide ="none" ) +theme_minimal() +labs( x ="RBC count", y ="WBC count")#boxplots plot_3_boxplot <-ggplot( athletes_dat,aes(sport, pcBfat, fill = sex, color = sex) ) +geom_boxplot(width =0.5) +scale_color_manual(values =c("#D55E00D0","#0072B2D0"),guide ="none" ) +scale_fill_manual(name =NULL,values =c("#D55E0040","#0072B240"),labels =c("female ", "male") ) +theme_minimal() +theme(legend.justification =c(1,1),legend.position =c(1,1),legend.direction ="horizontal" ) +labs( x =NULL, y ="% body fat" ) +guides(fill =guide_legend(override.aes =list(color =NA,fill =c("#D55E00D0","#0072B2D0") ) ) )# building graphic with patchwork(plot_1_bar_plot + plot_2_scatter) / plot_3_boxplot